#########################################
## Cell Med Review                     ##
## Gastric Bypass/ UOS                 ##
## Random Descriptive Stats/ Logit     ##
## Ben Krick                           ## 
## January 27th, 2022                  ##
#########################################

setwd("/Volumes/dfs/Playdon Research Group/Team_Members_Students/Ben Krick/UOS: GB : Ceramide Study /3_Projects/Gastric_Bypass_Cell_Med")

#Random Descriptive Stats 



library(readr)
library(dplyr)
require(table1)
library(broom)
library(tidyverse)

#load wide
wide_data <- read_csv("/Volumes/dfs/Playdon Research Group/Team_Members_Students/Ben Krick/UOS: GB : Ceramide Study /2_Intermediate Datasets/wide.csv")


wide_data$perc_wtkg_2_1 <-  (wide_data$wtkg_2 - wide_data$wtkg_1)/ wide_data$wtkg_1
wide_data$perc_wtkg_4_1 <-  (wide_data$wtkg_4 - wide_data$wtkg_1)/ wide_data$wtkg_1

#make group numeric 
wide_data$group <- ifelse(wide_data$grp_1 %in% "C", 0, NA)
wide_data$group <- ifelse(wide_data$grp_1 %in% "N", 0, wide_data$group)
wide_data$group <- ifelse(wide_data$grp_1 %in% "S", 1, wide_data$group)

# Run correlation for % body weight lost from baseline to 2-years and study group
cor <- cor.test(wide_data$perc_wtkg_2_1, wide_data$group, method=c("pearson"), use = "complete.obs")

# Run correlation for % body weight lost from baseline to 12-years and study group
cor <- cor.test(wide_data$perc_wtkg_4_1, wide_data$group, method=c("pearson"), use = "complete.obs")
cor$p.value

#baseline ceramides 
# ceramides <- c("log_Cer(d18:0/16:0)", "log_Cer(d18:0/18:0)", "log_Cer(d18:0/20:0)",
#                "log_Cer(d18:0/22:0)",  "log_Cer(d18:0/24:0)", "log_Cer(d18:0/24:1)",
#                "log_Cer(d18:1/16:0)",  "log_Cer(d18:1/17:0)", "log_Cer(d18:1/18:0)",
#                "log_Cer(d18:1/20:0)",     "log_Cer(d18:1/22:0)",     "log_Cer(d18:1/24:0)",
#                "log_Cer(d18:1/24:1)",     "log_GlcCer (d18:1/16:0)", "log_GlcCer (d18:1/18:0)",
#                "log_GlcCer (d18:1/20:0)", "log_GlcCer (d18:1/22:0)", "log_GlcCer (d18:1/24:0)",
#                "log_GlcCer (d18:1/24:1)", "log_PC(34:2)",            "log_PC(36:0)" ,
#                "log_PC(36:1)",            "log_PC(36:2)"  ,          "log_PC(36:3)" ,
#                "log_PC(36:4)",        "log_SM(d18:0/16:0)",      "log_SM(d18:0/18:0)",
#                "log_SM(d18:0/20:0)",      "log_SM(d18:0/22:0)",      "log_SM(d18:0/24:0)",
#                "log_SM(d18:0/24:1)",      "log_SM(d18:1/16:0)",      "log_SM(d18:1/18:0)",
#                "log_SM(d18:1/20:0)",      "log_SM(d18:1/22:0)",      "log_SM(d18:1/24:0)",
#                "log_SM(d18:1/24:1)",      "log_Sphinganine",        "log_Sphingosine")


# Correlation matrix for baseline ceramides, dihydroceramides, LDL, VLDL, HDL, triglycerides


# x <- wide_data[c("log_Cer(d18:0/16:0)","log_Cer(d18:0/18:0)", "log_Cer(d18:0/20:0)", "log_Cer(d18:0/22:0)","log_Cer(d18:0/24:0)",
#                  "log_Cer(d18:0/24:1)", "log_Cer(d18:1/16:0)", "log_Cer(d18:1/18:0)", "log_Cer(d18:1/20:0)", "log_Cer(d18:1/22:0)",
#                  "log_Cer(d18:1/24:0)", "log_Cer(d18:1/24:1)", "ldl_calc_1", "vldl_1", "hdl_1", "triglyceride_1")]


surgery <- wide_data[wide_data$grp_1 %in% "S",]

x <- surgery[c("Cer(d18:0/16:0)","Cer(d18:0/18:0)", "Cer(d18:0/20:0)", "Cer(d18:0/22:0)","Cer(d18:0/24:0)",
               "Cer(d18:0/24:1)", "Cer(d18:1/16:0)", "Cer(d18:1/18:0)", "Cer(d18:1/20:0)", "Cer(d18:1/22:0)",
               "Cer(d18:1/24:0)", "Cer(d18:1/24:1)", "ldl_meas_1", "vldl_1", "hdl_1", "triglyceride_1")]


cor_baseline <- cor(x, method = "pearson", use = "complete.obs")
# write.csv(as.data.frame(cor_baseline), "Results/cor_surgery_baseline.csv")

library(corrplot)

# tiff("cor_surgery_baseline.tiff", units="in", width=5, height=5, res=300)
# corrplot(cor_baseline, type = "full", order = "alphabet", 
#          tl.col = "black", tl.srt = 45)
# dev.off()

# Correlation analysis for 2-years

x <- surgery[c("Cer(d18:0/16:0)_2","Cer(d18:0/18:0)_2", "Cer(d18:0/20:0)_2", "Cer(d18:0/22:0)_2","Cer(d18:0/24:0)_2",
               "Cer(d18:0/24:1)_2", "Cer(d18:1/16:0)_2", "Cer(d18:1/18:0)_2", "Cer(d18:1/20:0)_2", "Cer(d18:1/22:0)_2",
               "Cer(d18:1/24:0)_2", "Cer(d18:1/24:1)_2", "ldl_meas_2", "vldl_2", "hdl_2", "triglyceride_2")]
`
cor_2 <- cor(x, method = "pearson", use = "complete.obs")
# write.csv(as.data.frame(cor_2), "Results/cor_surgery_2.csv")
# 
# tiff("cor_surgery_2.tiff", units="in", width=5, height=5, res=300)
# corrplot(cor_2, type = "full", order = "alphabet", 
#          tl.col = "black", tl.srt = 45)
# dev.off()

# % change in these parameters from baseline to 2-years

wide_data$`Cer(d18:0/16:0)_1_2` <- (wide_data$`Cer(d18:0/16:0)_2` - wide_data$`Cer(d18:0/16:0)`)/ wide_data$`Cer(d18:0/16:0)`
wide_data$`Cer(d18:0/18:0)_1_2` <- (wide_data$`Cer(d18:0/18:0)_2` - wide_data$`Cer(d18:0/18:0)`)/ wide_data$`Cer(d18:0/18:0)`
wide_data$`Cer(d18:0/20:0)_1_2` <- (wide_data$`Cer(d18:0/20:0)_2` - wide_data$`Cer(d18:0/20:0)`)/ wide_data$`Cer(d18:0/20:0)`
wide_data$`Cer(d18:0/22:0)_1_2` <- (wide_data$`Cer(d18:0/22:0)_2` - wide_data$`Cer(d18:0/22:0)`)/ wide_data$`Cer(d18:0/22:0)`
wide_data$`Cer(d18:0/24:0)_1_2` <- (wide_data$`Cer(d18:0/24:0)_2` - wide_data$`Cer(d18:0/24:0)`)/ wide_data$`Cer(d18:0/24:0)`
wide_data$`Cer(d18:0/24:1)_1_2` <- (wide_data$`Cer(d18:0/24:1)_2` - wide_data$`Cer(d18:0/24:1)`)/ wide_data$`Cer(d18:0/24:1)`

wide_data$`Cer(d18:1/16:0)_1_2` <- (wide_data$`Cer(d18:1/16:0)_2` - wide_data$`Cer(d18:1/16:0)`)/ wide_data$`Cer(d18:1/16:0)`
wide_data$`Cer(d18:1/18:0)_1_2` <- (wide_data$`Cer(d18:1/18:0)_2` - wide_data$`Cer(d18:1/18:0)`)/ wide_data$`Cer(d18:1/18:0)`
wide_data$`Cer(d18:1/20:0)_1_2` <- (wide_data$`Cer(d18:1/20:0)_2` - wide_data$`Cer(d18:1/20:0)`)/ wide_data$`Cer(d18:1/20:0)`
wide_data$`Cer(d18:1/22:0)_1_2` <- (wide_data$`Cer(d18:1/22:0)_2` - wide_data$`Cer(d18:1/22:0)`)/ wide_data$`Cer(d18:1/22:0)`
wide_data$`Cer(d18:1/24:0)_1_2` <- (wide_data$`Cer(d18:1/24:0)_2` - wide_data$`Cer(d18:1/24:0)`)/ wide_data$`Cer(d18:1/24:0)`
wide_data$`Cer(d18:1/24:1)_1_2` <- (wide_data$`Cer(d18:1/24:1)_2` - wide_data$`Cer(d18:1/24:1)`)/ wide_data$`Cer(d18:1/24:1)`


wide_data$ldl_meas_1_2 <- (wide_data$ldl_meas_2 - wide_data$ldl_meas_1)/ wide_data$ldl_meas_1
wide_data$vldl_1_2 <- (wide_data$vldl_2 - wide_data$vldl_1)/ wide_data$vldl_1
wide_data$hdl_1_2 <- (wide_data$hdl_2 - wide_data$hdl_1)/ wide_data$hdl_1
wide_data$triglyceride_1_2 <- (wide_data$triglyceride_2 - wide_data$triglyceride_1)/ wide_data$triglyceride_1

surgery <- wide_data[wide_data$grp_1 %in% "S",]


x <- surgery[c("Cer(d18:0/16:0)_1_2","Cer(d18:0/18:0)_1_2", "Cer(d18:0/20:0)_1_2", "Cer(d18:0/22:0)_1_2","Cer(d18:0/24:0)_1_2",
               "Cer(d18:0/24:1)_1_2", "Cer(d18:1/16:0)_1_2", "Cer(d18:1/18:0)_1_2", "Cer(d18:1/20:0)_1_2", "Cer(d18:1/22:0)_1_2",
               "Cer(d18:1/24:0)_1_2", "Cer(d18:1/24:1)_1_2", "ldl_meas_1_2", "vldl_1_2", "hdl_1_2", "triglyceride_1_2")]
cor_2_1 <- cor(x, method = "pearson", use = "complete.obs")
# write.csv(as.data.frame(cor_2_1), "Results/cor_surgery_2_1.csv")
# 
# 
# tiff("cor_surgery_2_1.tiff", units="in", width=5, height=5, res=300)
# corrplot(cor_2_1, type = "full", order = "alphabet", 
#          tl.col = "black", tl.srt = 45)
# dev.off()



# # Run logistic regression with % change in ceramide from 2 to 12 years as exposure, outcome = change in body weight; more than 10% = 1, less than 10% body weight gain = 0

wide_data$`Cer(d18:0/16:0)_4_2` <- (wide_data$`Cer(d18:0/16:0)_4` - wide_data$`Cer(d18:0/16:0)_2`)/ wide_data$`Cer(d18:0/16:0)_2`
wide_data$`Cer(d18:0/18:0)_4_2` <- (wide_data$`Cer(d18:0/18:0)_4` - wide_data$`Cer(d18:0/18:0)_2`)/ wide_data$`Cer(d18:0/18:0)_2`
wide_data$`Cer(d18:0/20:0)_4_2` <- (wide_data$`Cer(d18:0/20:0)_4` - wide_data$`Cer(d18:0/20:0)_2`)/ wide_data$`Cer(d18:0/20:0)_2`
wide_data$`Cer(d18:0/22:0)_4_2` <- (wide_data$`Cer(d18:0/22:0)_4` - wide_data$`Cer(d18:0/22:0)_2`)/ wide_data$`Cer(d18:0/22:0)_2`
wide_data$`Cer(d18:0/24:0)_4_2` <- (wide_data$`Cer(d18:0/24:0)_4` - wide_data$`Cer(d18:0/24:0)_2`)/ wide_data$`Cer(d18:0/24:0)_2`
wide_data$`Cer(d18:0/24:1)_4_2` <- (wide_data$`Cer(d18:0/24:1)_4` - wide_data$`Cer(d18:0/24:1)_2`)/ wide_data$`Cer(d18:0/24:1)_2`

wide_data$`Cer(d18:1/16:0)_4_2` <- (wide_data$`Cer(d18:1/16:0)_4` - wide_data$`Cer(d18:1/16:0)_2`)/ wide_data$`Cer(d18:1/16:0)_2`
wide_data$`Cer(d18:1/18:0)_4_2` <- (wide_data$`Cer(d18:1/18:0)_4` - wide_data$`Cer(d18:1/18:0)_2`)/ wide_data$`Cer(d18:1/18:0)_2`
wide_data$`Cer(d18:1/20:0)_4_2` <- (wide_data$`Cer(d18:1/20:0)_4` - wide_data$`Cer(d18:1/20:0)_2`)/ wide_data$`Cer(d18:1/20:0)_2`
wide_data$`Cer(d18:1/22:0)_4_2` <- (wide_data$`Cer(d18:1/22:0)_4` - wide_data$`Cer(d18:1/22:0)_2`)/ wide_data$`Cer(d18:1/22:0)_2`
wide_data$`Cer(d18:1/24:0)_4_2` <- (wide_data$`Cer(d18:1/24:0)_4` - wide_data$`Cer(d18:1/24:0)_2`)/ wide_data$`Cer(d18:1/24:0)_2`
wide_data$`Cer(d18:1/24:1)_4_2` <- (wide_data$`Cer(d18:1/24:1)_4` - wide_data$`Cer(d18:1/24:1)_2`)/ wide_data$`Cer(d18:1/24:1)_2`

wide_data$perc_wtkg_4_2 <-  (wide_data$wtkg_4 - wide_data$wtkg_2)/ wide_data$wtkg_2

wide_data$binary_weight_4_2 <- ifelse(wide_data$perc_wtkg_4_2  > .1, 1, NA)
wide_data$binary_weight_4_2 <- ifelse(wide_data$perc_wtkg_4_2  < .1, 0, wide_data$binary_weight_4_2)


ceramides <- c("Cer(d18:0/16:0)_4_2","Cer(d18:0/18:0)_4_2", "Cer(d18:0/20:0)_4_2", "Cer(d18:0/22:0)_4_2","Cer(d18:0/24:0)_4_2",
               "Cer(d18:0/24:1)_4_2", "Cer(d18:1/16:0)_4_2", "Cer(d18:1/18:0)_4_2", "Cer(d18:1/20:0)_4_2", "Cer(d18:1/22:0)_4_2",
               "Cer(d18:1/24:0)_4_2", "Cer(d18:1/24:1)_4_2")
#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(binary_weight_4_2 ~ get(exposure), 
               na.action = na.exclude,
               data=wide_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("ESTIMATE", "CI_L", "CI_U", "PVALUE")
#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

#write_csv(results, "Results/12_2_weight_ceramides.csv")


# # # Weight Change analysis for just the surgery group 

# Descriptive Stats
surgery <- wide_data[wide_data$grp_1 %in% "S",]
# surgery <- wide_data[wide_data$grp_2 %in% "S",]


#percentage change in weight from baseline to 2-years;   

surgery$weight_baseline_2 <- (surgery$wtkg_2 - surgery$wtkg_1)/ surgery$wtkg_1
hist(surgery$weight_baseline_2, main = "Weight Percent Change: Baseline to 2 years")

table(surgery$weight_baseline_2 > 0)

# baseline to 12-years;
surgery$weight_baseline_12 <- (surgery$wtkg_4 - surgery$wtkg_1)/ surgery$wtkg_1
hist(surgery$weight_baseline_12, main = "Weight Percent Change: Baseline to 12 years")

table(surgery$weight_baseline_12 > 0)

# 2-years to 12-years
surgery$weight_2_12 <- (surgery$wtkg_4 - surgery$wtkg_2)/ surgery$wtkg_2
hist(surgery$weight_2_12, main = "Weight Percent Change: 2 to 12 years")

table(surgery$weight_2_12 > 0)

#Bmi gained from baseline to 2-years; baseline to 12-years; 2-years to 12-years – present data in table or histogram
surgery$bmi_baseline_2 <- (surgery$bmi_2 - surgery$bmi_1)/ surgery$bmi_1
hist(surgery$bmi_baseline_2, main = "BMI Percent Change: Baseline to 2 years")

table(surgery$bmi_baseline_2 > 0)

surgery$bmi_baseline_12 <- (surgery$bmi_4 - surgery$bmi_1)/ surgery$bmi_1
hist(surgery$bmi_baseline_12, main = "BMI Percent Change: Baseline to 12 years")

table(surgery$bmi_baseline_12 > 0)

surgery$bmi_2_12 <- (surgery$bmi_4 - surgery$bmi_2)/ surgery$bmi_2
hist(surgery$bmi_2_12, main = "BMI Percent Change: 2 to 12 years")
table(surgery$bmi_2_12 > 0)

# # # change in weight from 2 to 12 ~ 2 year Ceramides

two_ceramides <- colnames(wide_data)[grep("^log_.*_2$", colnames(wide_data))]


# # # No Adjustment 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")


#loop through ceramides 
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure), 
              na.action = na.exclude,
              data=surgery)
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

# write_csv(results, "Results/weight_2_12_ceramides2_unadjusted.csv")

# # # Minimally Adjusted 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")


#loop through ceramides 
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure) + age_1 + sexint + bmi_1, 
              na.action = na.exclude,
              data=surgery)
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

# write_csv(results, "Results/weight_2_12_ceramides2_min_adjusted.csv")


# # # Fully Adjusted 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")

#loop through ceramides
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + marrstat_1 + income_1 + smoker_2 + bpmeds_2 + ldl_meas_2 + vldl_2 + hdl_2 + triglyceride_2, 
              na.action = na.exclude,
              data= surgery) 
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

#results$model <- "2 year Ceramides & Percentage Change of Weight 2 to 12"

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

# write_csv(results, "Results/weight_2_12_ceramides2_full_adjust.csv")


# # # change in weight from 2 to 12 ~ 12 year Ceramides

two_ceramides <- colnames(wide_data)[grep("^log_.*_4$", colnames(wide_data))]


# # # No Adjustment 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")


#loop through ceramides 
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure), 
              na.action = na.exclude,
              data=surgery)
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/weight_2_12_ceramides4_unadjusted.csv")

# # # Minimally Adjusted 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")


#loop through ceramides 
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure) + age_1 + sexint + bmi_1, 
              na.action = na.exclude,
              data=surgery)
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/weight_2_12_ceramides4_min_adjusted.csv")


# # # Fully Adjusted 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

#provide column names
colnames(results) <- c("EST", "CI_L", "CI_U", "PVALUE")

#loop through ceramides
for (i in two_ceramides){
  exposure = i 
  model <- lm(weight_2_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + marrstat_1 + income_1 + smoker_2 + bpmeds_2 + ldl_meas_2 + vldl_2 + hdl_2 + triglyceride_2,
              na.action = na.exclude,
              data= surgery) 
  temp_results<- cbind(coef(model)[2], confint(model)[2,1], confint(model)[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("EST", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

#results$model <- "2 year Ceramides & Percentage Change of Weight 2 to 12"

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/weight_2_12_ceramides4_full_adjust.csv")


###
###
###

# # # Diabetes remission logistic regression

# just people with surgery and diabetes at baseline 

surgery_diab_data <- wide_data[wide_data$grp_1 %in% "S" & wide_data$dmstatus_1 %in% "Y",]

# code diabete remission 

table(surgery_diab_data$dmremission_12)
# N  U  Y 
# 22  3 66 
surgery_diab_data$dmremission_12 <- ifelse(surgery_diab_data$dmremission_12  %in% "Y", 1, surgery_diab_data$dmremission_12)
surgery_diab_data$dmremission_12 <- ifelse(surgery_diab_data$dmremission_12  %in% "N", 0, surgery_diab_data$dmremission_12)
surgery_diab_data$dmremission_12 <- ifelse(surgery_diab_data$dmremission_12  %in% "U", 0, surgery_diab_data$dmremission_12)

table(surgery_diab_data$dmremission_12)
# 0  1 
# 22 66 
surgery_diab_data$dmremission_12 <- as.factor(surgery_diab_data$dmremission_12)

# clean duration (impute mean if NA)
surgery_diab_data$diab_duration <- surgery_diab_data$age_1 - surgery_diab_data$diabageon_1
surgery_diab_data$diab_duration <- ifelse(is.na(surgery_diab_data$diab_duration), mean(surgery_diab_data$diab_duration, na.rm = T), surgery_diab_data$diab_duration)



####
# baseline year ceramdies 
ceramides <- c("scale_log_Cer(d18:0/16:0)_1","scale_log_Cer(d18:0/18:0)_1", "scale_log_Cer(d18:0/20:0)_1", "scale_log_Cer(d18:0/22:0)_1","scale_log_Cer(d18:0/24:0)_1",
               "scale_log_Cer(d18:0/24:1)_1", "scale_log_Cer(d18:1/16:0)_1", "scale_log_Cer(d18:1/18:0)_1", "scale_log_Cer(d18:1/20:0)_1", "scale_log_Cer(d18:1/22:0)_1",
               "scale_log_Cer(d18:1/24:0)_1", "scale_log_Cer(d18:1/24:1)_1")

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")

#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model1_baseline_ceramides.csv")

# # # model 2: more adjustments (but without hb1ac)

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model2_without_hba1c_1_baseline_ceramides.csv")

# # # model 2: more adjustments 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1 + hba1c_1, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
#add BH and organize 
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model2_hba1c_1_baseline_ceramides.csv")

# # # model 3: most adjustments (with diabpriormedinsulin_4)


#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1 + hba1c_1 + diabpriormedinsulin_4, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model3_prior_insulin_baseline_ceramides.csv")



#####
# 2 year ceramdies 
ceramides <- c("scale_log_Cer(d18:0/16:0)_2","scale_log_Cer(d18:0/18:0)_2", "scale_log_Cer(d18:0/20:0)_2", "scale_log_Cer(d18:0/22:0)_2","scale_log_Cer(d18:0/24:0)_2",
               "scale_log_Cer(d18:0/24:1)_2", "scale_log_Cer(d18:1/16:0)_2", "scale_log_Cer(d18:1/18:0)_2", "scale_log_Cer(d18:1/20:0)_2", "scale_log_Cer(d18:1/22:0)_2",
               "scale_log_Cer(d18:1/24:0)_2", "scale_log_Cer(d18:1/24:1)_2")

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model1_2year_cermides.csv")

# # # model 2: more adjustments (but without hb1ac)

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

#write_csv(results, "Results/diabetes_remission_model2_without_hba1c_1_2year_ceramides.csv")

# # # model 2: more adjustments 

#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1 + hba1c_1, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model2_hba1c_1_2_year_ceramides.csv")

# # # model 3: most adjustments (with diabpriormedinsulin_4)


#create empty final results 
results <- data.frame(matrix(ncol = 4, nrow = 0))

# run the model in a loop of ceramides (original model in paper)
for (i in ceramides){
  exposure = i 
  model <- glm(dmremission_12 ~ get(exposure) + age_1 + sexint + bmi_1 + race + diab_duration + diabmed_1 + hba1c_1 + diabpriormedinsulin_4, 
               # na.action = na.exclude,
               data=surgery_diab_data, family = binomial)
  temp_results<- cbind(exp(coef(model))[2], exp(confint(model))[2,1], exp(confint(model))[2,2], tidy(model)[2,5])
  colnames(temp_results) <- c("OR", "CI_L", "CI_U", "PVALUE")
  row.names(temp_results) <- i 
  results <- rbind(results, temp_results)
}

#provide column names
colnames(results) <- c("OR", "CI_L", "CI_U", "PVALUE")
#order
results$BH <- p.adjust(results$PVALUE,  method = "BH") #adjusted p value 
results <- results[order(results$PVALUE),]

results <- results %>% 
  rownames_to_column(var="ceramide")%>% 
  remove_rownames

write_csv(results, "Results/diabetes_remission_model3_prior_insulin_2_year_ceramides.csv")


###
###
###

# # # Prediction Modeling / ROC AUC 
surgery_diab_data$`scale_log_Cer(d18:1/16:0)_1`
surgery_diab_data$`scale_log_Cer(d18:1/24:1)_1`

c16 <- glm(dmremission_12 ~ `scale_log_Cer(d18:1/16:0)_1` + age_1 + sex + bmi_1 + race, 
           data=surgery_diab_data, family = binomial)

c16_results<-  cbind(exp(coef(c16))[2], exp(confint(c16))[2,1], exp(confint(c16))[2,2], tidy(c16)[2,5])
colnames(c16_results) <- c("OR", "CI_L", "CI_U", "PVALUE")

c24_1 <- glm(dmremission_12 ~ `scale_log_Cer(d18:1/24:1)_1` + age_1 + sex + bmi_1 + race, 
             data=surgery_diab_data, family = binomial)
c24_1_results<-  cbind(exp(coef(c24_1))[2], exp(confint(c24_1))[2,1], exp(confint(c24_1))[2,2], tidy(c24_1)[2,5])
colnames(c24_1_results) <- c("OR", "CI_L", "CI_U", "PVALUE")



clinical <- glm(dmremission_12 ~ diab_duration + diabmed_1 + hba1c_1 + age_1 + sex + bmi_1 + race, , 
                data=surgery_diab_data, family = binomial)

summary(clinical)


clinical_c16 <- glm(dmremission_12 ~ `scale_log_Cer(d18:1/16:0)_1` +diab_duration + diabmed_1 + hba1c_1 + age_1 + sex + bmi_1 + race, , 
                    data=surgery_diab_data, family = binomial)
summary(clinical_c16)
clinical_24_1 <- glm(dmremission_12 ~ `scale_log_Cer(d18:1/24:1)_1` +diab_duration + diabmed_1 + hba1c_1 + age_1 + sex + bmi_1 + race, , 
                     data=surgery_diab_data, family = binomial)
summary(clinical_24_1)

surgery_diab_data$sex <- ifelse(surgery_diab_data$sex  %in% "M", 1, surgery_diab_data$sex)
surgery_diab_data$sex <- ifelse(surgery_diab_data$sex  %in% "F", 0, surgery_diab_data$sex)
surgery_diab_data$sex <- as.numeric(surgery_diab_data$sex)
library(pROC)

# # # basline (clinical + .each ceramides, clinical + all ceramides)

setwd("~/Box/GB Figures/Feb 2, 2020_Cell_Med_Reviews/ROC_AUC_FEB9")
# clincal 
tiff("baseline_clinical_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "black", main = "Clinical")
dev.off()

#clinical + c16 
tiff("clinical_c_16_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/16:0)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/16:0)")
dev.off()

#clinical + c18
tiff("clinical_c_18_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/18:0)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/18:0)")
dev.off()

#clinical + c20
tiff("clinical_c_20_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/20:0)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/20:0)")
dev.off()

#clinical + c22
tiff("clinical_c_22_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/22:0)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/22:0)")
dev.off()

#clinical + c24
tiff("clinical_c_24_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/24:0)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/24:0)")
dev.off()

#clinical + c24_1 
tiff("clinical_c_24_1_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/24:1)_1` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/24:1)")
dev.off()

#clincal + all above 
tiff("clinical_all_baseline_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/16:0)_1` +
           surgery_diab_data$`scale_log_Cer(d18:1/18:0)_1` + 
           surgery_diab_data$`scale_log_Cer(d18:1/20:0)_1` +
           surgery_diab_data$`scale_log_Cer(d18:1/22:0)_1` +
           surgery_diab_data$`scale_log_Cer(d18:1/24:0)_1` +
           surgery_diab_data$`scale_log_Cer(d18:1/24:1)_1` +
           surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race)
     , print.auc=TRUE, col= "purple", name = "Clincal + All Ceramides")
dev.off()


# # # 2 year  (clinical + .each ceramides, clinical + all ceramides)

#clinical + c16 
tiff("clinical_c_16_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/16:0)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/16:0)")
dev.off()

#clinical + c18
tiff("clinical_c_18_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/18:0)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/18:0)")
dev.off()

#clinical + c20
tiff("clinical_c_20_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/20:0)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/20:0)")
dev.off()

#clinical + c22
tiff("clinical_c_22_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/22:0)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/22:0)")
dev.off()

#clinical + c24
tiff("clinical_c_24_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/24:0)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/24:0)")
dev.off()

#clinical + c24_1 
tiff("clinical_c_24_1_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/24:1)_2` + surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race), print.auc=TRUE, col= "blue", main = "Clinical + Cer(d18:1/24:1)")
dev.off()

#clincal + all above 
tiff("clinical_all_2_years_roc.tif", res=600, compression = "lzw", height=5, width=5, units="in")
plot(roc(surgery_diab_data$dmremission_12,  surgery_diab_data$`scale_log_Cer(d18:1/16:0)_2` +
           surgery_diab_data$`scale_log_Cer(d18:1/18:0)_2` + 
           surgery_diab_data$`scale_log_Cer(d18:1/20:0)_2` +
           surgery_diab_data$`scale_log_Cer(d18:1/22:0)_2` +
           surgery_diab_data$`scale_log_Cer(d18:1/24:0)_2` +
           surgery_diab_data$`scale_log_Cer(d18:1/24:1)_2` +
           surgery_diab_data$diab_duration + surgery_diab_data$diabmed_1 + surgery_diab_data$hba1c_1 + surgery_diab_data$age_1 + surgery_diab_data$sex + surgery_diab_data$bmi_1 + surgery_diab_data$race)
     , print.auc=TRUE, col= "purple", name = "Clincal + All Ceramides")
dev.off()